Violent Recidivism
Fit age polynomial
age_subset = features %>%
filter(`Risk of Recidivism_decile_score` != -1, `Risk of Violence_decile_score` != -1) %>% # Filter 1
select(p_current_age, p_recid_raw = `Risk of Violence_raw_score`) %>%
filter(p_current_age > 18 & p_current_age <= 70) %>% # Filter 5
group_by(p_current_age) %>%
arrange(p_recid_raw, .by_group=TRUE) %>%
top_n(n=-1, wt=p_recid_raw) # Filter 6
mdl_age = lm(p_recid_raw ~
I(p_current_age^4) +
I(p_current_age^3) +
I(p_current_age^2) +
p_current_age,
data=age_subset)
summary(mdl_age)
##
## Call:
## lm(formula = p_recid_raw ~ I(p_current_age^4) + I(p_current_age^3) +
## I(p_current_age^2) + p_current_age, data = age_subset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.57815 -0.00957 0.00722 0.03974 0.13353
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.554e+00 8.206e-01 1.894 0.061779 .
## I(p_current_age^4) 4.078e-07 2.904e-07 1.404 0.164074
## I(p_current_age^3) -8.818e-05 5.109e-05 -1.726 0.088148 .
## I(p_current_age^2) 7.437e-03 3.227e-03 2.304 0.023731 *
## p_current_age -3.161e-01 8.631e-02 -3.662 0.000441 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09085 on 82 degrees of freedom
## Multiple R-squared: 0.9799, Adjusted R-squared: 0.979
## F-statistic: 1001 on 4 and 82 DF, p-value: < 2.2e-16
print("Coefficients:")
## [1] "Coefficients:"
sprintf("%.20e",mdl_age$coefficients) # More precision for paper
## [1] "1.55398886995651119847e+00" "4.07808994844963593304e-07"
## [3] "-8.81767707692974442345e-05" "7.43693932415256248047e-03"
## [5] "-3.16106532728205047444e-01"
Generic stuff (applies to all models)
### Add useful columns to features and apply row filters used for all models
features_filt = features %>%
inner_join(
data_before %>%
select(person_id, screening_date, people) %>%
unnest() %>%
select(person_id, screening_date, race, sex, name),
by = c("person_id","screening_date")
) %>%
inner_join(features_on, by = c("person_id","screening_date")) %>%
inner_join(outcomes, by = c("person_id","screening_date")) %>%
filter(`Risk of Recidivism_decile_score` != -1, `Risk of Violence_decile_score` != -1) %>% # Filter 1
filter(!is.na(current_offense_date)) %>% # Filter 3
mutate(p_recid_raw = `Risk of Violence_raw_score`,
age_poly = predict(mdl_age, newdata=data.frame(p_current_age=p_current_age)),
p_recid_raw_noage = p_recid_raw - age_poly)
## Set parameters (each combination will be run)
param <- list(objective = "reg:linear",
eval_metric = "rmse",
eta = c(.05,.1),
gamma = c(.5, 1),
max_depth = c(2,5),
min_child_weight = c(5,10),
subsample = c(1),
colsample_bytree = c(1)
)
# svm
param_svm = list(
type = 'eps-regression',
cost = c(0.5,1,2),
epsilon = c(0.5,1,1.5),
gamma_scale = c(0.5,1,2)
)
res_rmse = data.frame(Group = 1:4, lm = NA, xgb = NA, rf = NA, svm = NA)
## Age polynomial
xmin = 18
xmax = 70
xx = seq(xmin,xmax, length.out=1000)
ggplot()+
geom_point(aes(x=p_current_age, p_recid_raw, color="b"), alpha=.3, data=features_filt) +
geom_line(aes(x=xx, predict(mdl_age, newdata=data.frame(p_current_age=xx)), color="a")) +
theme_bw()+
xlim(xmin,xmax)+
xlab("Age at COMPAS screening date") +
ylab("COMPAS violence raw") +
theme(text = element_text(size=12),
axis.text=element_text(size=12),
legend.position="none")
## Warning: Removed 19 rows containing missing values (geom_point).

ggsave("Figures/age_agePoly_violent.pdf",width = 3.5, height = 2.5, units = "in")
## Warning: Removed 19 rows containing missing values (geom_point).
### Number of priors vs. COMPAS remainder
ggplot(data=features_filt) +
geom_point(aes(x=p_charge_violent, y=p_recid_raw_noage), alpha=.3)+
theme_bw() +
xlab("Number of prior violent charges") +
ylab("COMPAS violence raw remainder") +
theme(
text = element_text(size=12),
axis.text=element_text(size=12))

ggsave("Figures/priors_rawScoreRemain_violent.pdf",width = 3.5, height = 3.5, units = "in")
## Total violence history items
features_filt %>%
mutate(
p_juv_fel_count = pmin(p_juv_fel_count,2),
p_felprop_violarrest = pmin(p_felprop_violarrest,5),
p_murder_arrest = pmin(p_murder_arrest,3),
p_felassault_arrest = pmin(p_felassault_arrest,3),
p_misdemassault_arrest = pmin(p_misdemassault_arrest,3),
#p_famviol_arrest = pmin(p_famviol_arrest,3),
p_sex_arrest = pmin(p_sex_arrest,3),
p_weapons_arrest = pmin(p_weapons_arrest,3)) %>%
ggplot() +
geom_point(aes(y = p_recid_raw_noage,
x=p_juv_fel_count+
p_felprop_violarrest+
p_murder_arrest+
p_felassault_arrest+
p_misdemassault_arrest+
#p_famviol_arrest+
p_sex_arrest+
p_weapons_arrest),
alpha=.3) +
xlab("Total Violence History items") +
ylab("COMPAS violence raw remainder")+
xlim(0,15) +
theme_bw() +
theme(text = element_text(size=12),
axis.text=element_text(size=12))

ggsave("Figures/violenceHist_rawScoreRemain_violent.pdf",width = 3.5, height = 3.5, units = "in")
Replicating ProPublica logistic regression
propub = features_filt %>%
filter(current_offense_date <= current_offense_date_limit) %>% # Only people with valid recidivism values
mutate(age_low = if_else(p_current_age < 25,1,0),
age_high = if_else(p_current_age > 45,1,0),
female = if_else(sex=="Female",1,0),
n_priors = p_felony_count_person + p_misdem_count_person,
compas_high = if_else(`Risk of Violence_decile_score` >= 5, 1, 0), # Medium and High risk scores get +1 label
race = relevel(factor(race), ref="Caucasian")) # Base level is Caucasian, as in ProPublica analysis
mdl_glm = glm(compas_high ~
female +
age_high +
age_low +
as.factor(race) +
p_charge +
is_misdem +
recid_violent,
family=binomial(link='logit'), data=propub)
summary(mdl_glm)
##
## Call:
## glm(formula = compas_high ~ female + age_high + age_low + as.factor(race) +
## p_charge + is_misdem + recid_violent, family = binomial(link = "logit"),
## data = propub)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6118 -0.6001 -0.3213 0.7395 2.9655
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.368882 0.094675 -25.021 < 2e-16 ***
## female -0.651197 0.099532 -6.543 6.05e-11 ***
## age_high -1.727458 0.174615 -9.893 < 2e-16 ***
## age_low 2.593419 0.079247 32.726 < 2e-16 ***
## as.factor(race)African-American 0.745976 0.083088 8.978 < 2e-16 ***
## as.factor(race)Asian -0.765381 0.678659 -1.128 0.259
## as.factor(race)Hispanic 0.005838 0.146006 0.040 0.968
## as.factor(race)Native American 0.503070 0.740727 0.679 0.497
## as.factor(race)Other -0.039456 0.170241 -0.232 0.817
## p_charge 0.074006 0.004375 16.915 < 2e-16 ***
## is_misdem -0.362223 0.077149 -4.695 2.67e-06 ***
## recid_violent 0.704532 0.093094 7.568 3.79e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 7378.0 on 5758 degrees of freedom
## Residual deviance: 4934.2 on 5747 degrees of freedom
## AIC: 4958.2
##
## Number of Fisher Scoring iterations: 6
Group 1 models: predicting (raw - age polynomial) without using age variables or race
### Create group 1 training data
## Select features and round count features
train = features_filt %>%
transmute(
#p_current_age,
p_age_first_offense,
p_juv_fel_count = pmin(p_juv_fel_count,2),
p_felprop_violarrest = pmin(p_felprop_violarrest,5),
p_murder_arrest = pmin(p_murder_arrest,3),
p_felassault_arrest = pmin(p_felassault_arrest,3),
p_misdemassault_arrest = pmin(p_misdemassault_arrest,3),
#p_famviol_arrest = pmin(p_famviol_arrest,3),
p_sex_arrest = pmin(p_sex_arrest,3),
p_weapons_arrest = pmin(p_weapons_arrest,3),
p_n_on_probation = pmin(p_n_on_probation,5),
p_current_on_probation = pmin(p_current_on_probation,5),
p_prob_revoke = pmin(p_prob_revoke,5),
p_recid_raw_noage)
## Format for xgboost
train_xgb = xgb.DMatrix(
"data" = train %>% select(-p_recid_raw_noage) %>% as.matrix(),
"label" = train %>% select(p_recid_raw_noage) %>% as.matrix()
)
Model 1: Linear model
mdl_lm = lm(p_recid_raw_noage ~ ., data=train)
summary(mdl_lm)
##
## Call:
## lm(formula = p_recid_raw_noage ~ ., data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9576 -0.3445 -0.1178 0.2532 3.7943
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.8451607 0.0153243 55.152 < 2e-16 ***
## p_age_first_offense -0.0065319 0.0004888 -13.362 < 2e-16 ***
## p_juv_fel_count 0.1306356 0.0196060 6.663 2.84e-11 ***
## p_felprop_violarrest 0.1124162 0.0061483 18.284 < 2e-16 ***
## p_murder_arrest 0.1027832 0.0481811 2.133 0.0329 *
## p_felassault_arrest 0.1810567 0.0114713 15.783 < 2e-16 ***
## p_misdemassault_arrest 0.1669203 0.0107051 15.593 < 2e-16 ***
## p_sex_arrest 0.1747182 0.0407972 4.283 1.87e-05 ***
## p_weapons_arrest 0.1033170 0.0158291 6.527 7.07e-11 ***
## p_n_on_probation 0.0787288 0.0051618 15.252 < 2e-16 ***
## p_current_on_probation 0.1522874 0.0260538 5.845 5.24e-09 ***
## p_prob_revoke 0.1443481 0.0195092 7.399 1.50e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5048 on 9030 degrees of freedom
## Multiple R-squared: 0.32, Adjusted R-squared: 0.3191
## F-statistic: 386.3 on 11 and 9030 DF, p-value: < 2.2e-16
res_rmse[res_rmse$Group==1,]$lm = rmse(predict(mdl_lm, newdata=train), train$p_recid_raw_noage) # ADJUST GROUP
Model 2: xgboost
set.seed(46)
mdl_xgb = fit_xgboost(train_xgb, param)
## Training on 16 sets of parameters.
## 15
## objective "reg:linear"
## eval_metric "rmse"
## eta "0.05"
## gamma "1"
## max_depth "5"
## min_child_weight "10"
## subsample "1"
## colsample_bytree "1"
### xgboost plot
pred = predict(mdl_xgb, newdata=train_xgb)
actual = train$p_recid_raw_noage
res_rmse[res_rmse$Group==1,]$xgb = rmse(pred, actual) # ADJUST GROUP
axis_min = min(min(pred),min(actual))
axis_max = max(max(pred),max(actual))
data.frame(xgboost = pred, compas=actual) %>%
ggplot() +
geom_point(aes(x=compas,y=xgboost), alpha=.3) +
geom_abline(slope=1, color="red")+
xlim(c(axis_min,axis_max)) +
ylim(c(axis_min,axis_max)) +
coord_fixed() +
theme_bw()+
xlab("COMPAS violence raw remainder") +
ylab("XGBoost prediction")+
theme(
text = element_text(size=14),
axis.text=element_text(size=14))

### Variable importance
xgb.plot.importance(importance_matrix = xgb.importance(model = mdl_xgb))

Model 3: random forest
set.seed(55656)
mdl_rf = randomForest(
formula = p_recid_raw_noage ~ .,
data = train
)
res_rmse[res_rmse$Group==1,]$rf = rmse(mdl_rf$predicted, train$p_recid_raw_noage) # ADJUST GROUP
Model 4: SVM
mdl_svm = fit_svm(p_recid_raw_noage ~ ., train, param_svm)
## Training on 27 sets of parameters.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## [1] "Best parameters:"
## 19
## type "eps-regression"
## cost "0.5"
## epsilon "0.5"
## gamma_scale "2"
## gamma "0.1666667"
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
res_rmse[res_rmse$Group==1,]$svm = rmse(mdl_svm$fitted, train$p_recid_raw_noage) # ADJUST GROUP
Cleanup
rm(train, train_xgb, mdl_lm, mdl_xgb, mdl_rf)
Group 2 models: predicting (raw - age polynomial) without using age variables but with race
### Create group 2 training data
## Select features and round count features
train = features_filt %>%
transmute(
#p_current_age,
p_age_first_offense,
p_juv_fel_count = pmin(p_juv_fel_count,2),
p_felprop_violarrest = pmin(p_felprop_violarrest,5),
p_murder_arrest = pmin(p_murder_arrest,3),
p_felassault_arrest = pmin(p_felassault_arrest,3),
p_misdemassault_arrest = pmin(p_misdemassault_arrest,3),
#p_famviol_arrest = pmin(p_famviol_arrest,3),
p_sex_arrest = pmin(p_sex_arrest,3),
p_weapons_arrest = pmin(p_weapons_arrest,3),
p_n_on_probation = pmin(p_n_on_probation,5),
p_current_on_probation = pmin(p_current_on_probation,5),
p_prob_revoke = pmin(p_prob_revoke,5),
race_black = if_else(race=="African-American",1,0),
race_white = if_else(race=="Caucasian",1,0),
race_hispanic = if_else(race=="Hispanic",1,0),
race_asian = if_else(race=="Asian",1,0),
race_native = if_else(race=="Native American",1,0), # race == "Other" is the baseline
p_recid_raw_noage)
## Format for xgboost
train_xgb = xgb.DMatrix(
"data" = train %>% select(-p_recid_raw_noage) %>% as.matrix(),
"label" = train %>% select(p_recid_raw_noage) %>% as.matrix()
)
Model 1: Linear model
mdl_lm = lm(p_recid_raw_noage ~ ., data=train)
summary(mdl_lm)
##
## Call:
## lm(formula = p_recid_raw_noage ~ ., data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9759 -0.3321 -0.1098 0.2460 3.6383
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.6137104 0.0260561 23.553 < 2e-16 ***
## p_age_first_offense -0.0044461 0.0004968 -8.949 < 2e-16 ***
## p_juv_fel_count 0.1212941 0.0192844 6.290 3.33e-10 ***
## p_felprop_violarrest 0.1088356 0.0060435 18.009 < 2e-16 ***
## p_murder_arrest 0.1060919 0.0473390 2.241 0.025 *
## p_felassault_arrest 0.1767698 0.0112734 15.680 < 2e-16 ***
## p_misdemassault_arrest 0.1668764 0.0105170 15.867 < 2e-16 ***
## p_sex_arrest 0.1659144 0.0400788 4.140 3.51e-05 ***
## p_weapons_arrest 0.0929469 0.0155661 5.971 2.45e-09 ***
## p_n_on_probation 0.0736827 0.0050787 14.508 < 2e-16 ***
## p_current_on_probation 0.1424988 0.0256041 5.565 2.69e-08 ***
## p_prob_revoke 0.1351758 0.0191733 7.050 1.92e-12 ***
## race_black 0.2747839 0.0225159 12.204 < 2e-16 ***
## race_white 0.1209223 0.0227808 5.308 1.13e-07 ***
## race_hispanic 0.0267705 0.0273158 0.980 0.327
## race_asian -0.0244794 0.0753349 -0.325 0.745
## race_native 0.1538104 0.0962585 1.598 0.110
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4958 on 9025 degrees of freedom
## Multiple R-squared: 0.3442, Adjusted R-squared: 0.3431
## F-statistic: 296.1 on 16 and 9025 DF, p-value: < 2.2e-16
res_rmse[res_rmse$Group==2,]$lm = rmse(predict(mdl_lm, newdata=train), train$p_recid_raw_noage) # ADJUST GROUP
Model 2: xgboost
set.seed(390)
mdl_xgb = fit_xgboost(train_xgb, param)
## Training on 16 sets of parameters.
## 14
## objective "reg:linear"
## eval_metric "rmse"
## eta "0.1"
## gamma "0.5"
## max_depth "5"
## min_child_weight "10"
## subsample "1"
## colsample_bytree "1"
### xgboost plot
pred = predict(mdl_xgb, newdata=train_xgb)
actual = train$p_recid_raw_noage
res_rmse[res_rmse$Group==2,]$xgb = rmse(pred, actual) # ADJUST GROUP
axis_min = min(min(pred),min(actual))
axis_max = max(max(pred),max(actual))
data.frame(xgboost = pred, compas=actual) %>%
ggplot() +
geom_point(aes(x=compas,y=xgboost), alpha=.3) +
geom_abline(slope=1, color="red")+
xlim(c(axis_min,axis_max)) +
ylim(c(axis_min,axis_max)) +
coord_fixed() +
theme_bw()+
xlab("COMPAS violence raw remainder") +
ylab("XGBoost prediction")+
theme(
text = element_text(size=14),
axis.text=element_text(size=14))

### Variable importance
xgb.plot.importance(importance_matrix = xgb.importance(model = mdl_xgb))

Model 3: random forest
set.seed(2728)
mdl_rf = randomForest(
formula = p_recid_raw_noage ~ .,
data = train
)
res_rmse[res_rmse$Group==2,]$rf = rmse(mdl_rf$predicted, train$p_recid_raw_noage) # ADJUST GROUP
Model 4: SVM
mdl_svm = fit_svm(p_recid_raw_noage ~ ., train, param_svm)
## Training on 27 sets of parameters.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## [1] "Best parameters:"
## 10
## type "eps-regression"
## cost "0.5"
## epsilon "0.5"
## gamma_scale "1"
## gamma "0.05882353"
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
res_rmse[res_rmse$Group==2,]$svm = rmse(mdl_svm$fitted, train$p_recid_raw_noage) # ADJUST GROUP
Cleanup
rm(train, train_xgb, mdl_lm, mdl_xgb, mdl_rf)
Group 3 models: predicting (raw - age polynomial) without using race but with age variables
### Create group 3 training data
## Select features and round count features
train = features_filt %>%
transmute(
p_current_age,
p_age_first_offense,
p_juv_fel_count = pmin(p_juv_fel_count,2),
p_felprop_violarrest = pmin(p_felprop_violarrest,5),
p_murder_arrest = pmin(p_murder_arrest,3),
p_felassault_arrest = pmin(p_felassault_arrest,3),
p_misdemassault_arrest = pmin(p_misdemassault_arrest,3),
#p_famviol_arrest = pmin(p_famviol_arrest,3),
p_sex_arrest = pmin(p_sex_arrest,3),
p_weapons_arrest = pmin(p_weapons_arrest,3),
p_n_on_probation = pmin(p_n_on_probation,5),
p_current_on_probation = pmin(p_current_on_probation,5),
p_prob_revoke = pmin(p_prob_revoke,5),
p_recid_raw_noage)
## Format for xgboost
train_xgb = xgb.DMatrix(
"data" = train %>% select(-p_recid_raw_noage) %>% as.matrix(),
"label" = train %>% select(p_recid_raw_noage) %>% as.matrix()
)
Model 1: Linear model
mdl_lm = lm(p_recid_raw_noage ~ ., data=train)
summary(mdl_lm)
##
## Call:
## lm(formula = p_recid_raw_noage ~ ., data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8853 -0.3396 -0.1105 0.2422 3.8069
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.7750803 0.0158841 48.796 < 2e-16 ***
## p_current_age 0.0140701 0.0009606 14.646 < 2e-16 ***
## p_age_first_offense -0.0195380 0.0010109 -19.327 < 2e-16 ***
## p_juv_fel_count 0.1216902 0.0193879 6.277 3.62e-10 ***
## p_felprop_violarrest 0.0902790 0.0062621 14.417 < 2e-16 ***
## p_murder_arrest 0.0807082 0.0476452 1.694 0.09031 .
## p_felassault_arrest 0.1480263 0.0115601 12.805 < 2e-16 ***
## p_misdemassault_arrest 0.1516864 0.0106318 14.267 < 2e-16 ***
## p_sex_arrest 0.1227168 0.0404793 3.032 0.00244 **
## p_weapons_arrest 0.0757266 0.0157582 4.806 1.57e-06 ***
## p_n_on_probation 0.0694142 0.0051413 13.501 < 2e-16 ***
## p_current_on_probation 0.1673132 0.0257716 6.492 8.91e-11 ***
## p_prob_revoke 0.0562168 0.0201996 2.783 0.00540 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4989 on 9029 degrees of freedom
## Multiple R-squared: 0.3358, Adjusted R-squared: 0.3349
## F-statistic: 380.3 on 12 and 9029 DF, p-value: < 2.2e-16
res_rmse[res_rmse$Group==3,]$lm = rmse(predict(mdl_lm, newdata=train), train$p_recid_raw_noage) # ADJUST GROUP
Model 2: xgboost
set.seed(34)
mdl_xgb = fit_xgboost(train_xgb, param)
## Training on 16 sets of parameters.
## 5
## objective "reg:linear"
## eval_metric "rmse"
## eta "0.05"
## gamma "0.5"
## max_depth "5"
## min_child_weight "5"
## subsample "1"
## colsample_bytree "1"
### xgboost plot
pred = predict(mdl_xgb, newdata=train_xgb)
actual = train$p_recid_raw_noage
res_rmse[res_rmse$Group==3,]$xgb = rmse(pred, actual) # ADJUST GROUP
axis_min = min(min(pred),min(actual))
axis_max = max(max(pred),max(actual))
data.frame(xgboost = pred, compas=actual) %>%
ggplot() +
geom_point(aes(x=compas,y=xgboost), alpha=.3) +
geom_abline(slope=1, color="red")+
xlim(c(axis_min,axis_max)) +
ylim(c(axis_min,axis_max)) +
coord_fixed() +
theme_bw()+
xlab("COMPAS violence raw remainder") +
ylab("XGBoost prediction")+
theme(
text = element_text(size=14),
axis.text=element_text(size=14))

### Variable importance
xgb.plot.importance(importance_matrix = xgb.importance(model = mdl_xgb))

Model 3: random forest
set.seed(7872)
mdl_rf = randomForest(
formula = p_recid_raw_noage ~ .,
data = train
)
res_rmse[res_rmse$Group==3,]$rf = rmse(mdl_rf$predicted, train$p_recid_raw_noage) # ADJUST GROUP
Model 4: SVM
mdl_svm = fit_svm(p_recid_raw_noage ~ ., train, param_svm)
## Training on 27 sets of parameters.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## [1] "Best parameters:"
## 1
## type "eps-regression"
## cost "0.5"
## epsilon "0.5"
## gamma_scale "0.5"
## gamma "0.03846154"
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
res_rmse[res_rmse$Group==3,]$svm = rmse(mdl_svm$fitted, train$p_recid_raw_noage) # ADJUST GROUP
Cleanup
rm(train, train_xgb, mdl_lm, mdl_xgb, mdl_rf)
Group 4 models: predicting (raw - age polynomial) using age variables and race
### Create group 4 training data
## Select features and round count features
train = features_filt %>%
transmute(
p_current_age,
p_age_first_offense,
p_juv_fel_count = pmin(p_juv_fel_count,2),
p_felprop_violarrest = pmin(p_felprop_violarrest,5),
p_murder_arrest = pmin(p_murder_arrest,3),
p_felassault_arrest = pmin(p_felassault_arrest,3),
p_misdemassault_arrest = pmin(p_misdemassault_arrest,3),
#p_famviol_arrest = pmin(p_famviol_arrest,3),
p_sex_arrest = pmin(p_sex_arrest,3),
p_weapons_arrest = pmin(p_weapons_arrest,3),
p_n_on_probation = pmin(p_n_on_probation,5),
p_current_on_probation = pmin(p_current_on_probation,5),
p_prob_revoke = pmin(p_prob_revoke,5),
race_black = if_else(race=="African-American",1,0),
race_white = if_else(race=="Caucasian",1,0),
race_hispanic = if_else(race=="Hispanic",1,0),
race_asian = if_else(race=="Asian",1,0),
race_native = if_else(race=="Native American",1,0), # race == "Other" is the baseline
p_recid_raw_noage)
## Format for xgboost
train_xgb = xgb.DMatrix(
"data" = train %>% select(-p_recid_raw_noage) %>% as.matrix(),
"label" = train %>% select(p_recid_raw_noage) %>% as.matrix()
)
Model 1: Linear model
mdl_lm = lm(p_recid_raw_noage ~ ., data=train)
summary(mdl_lm)
##
## Call:
## lm(formula = p_recid_raw_noage ~ ., data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9064 -0.3264 -0.1039 0.2364 3.6516
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.5544869 0.0260945 21.249 < 2e-16 ***
## p_current_age 0.0135686 0.0009462 14.340 < 2e-16 ***
## p_age_first_offense -0.0169983 0.0010038 -16.934 < 2e-16 ***
## p_juv_fel_count 0.1127517 0.0190788 5.910 3.55e-09 ***
## p_felprop_violarrest 0.0876690 0.0061557 14.242 < 2e-16 ***
## p_murder_arrest 0.0839132 0.0468369 1.792 0.07323 .
## p_felassault_arrest 0.1451296 0.0113640 12.771 < 2e-16 ***
## p_misdemassault_arrest 0.1521143 0.0104506 14.556 < 2e-16 ***
## p_sex_arrest 0.1160088 0.0397845 2.916 0.00356 **
## p_weapons_arrest 0.0663196 0.0155042 4.278 1.91e-05 ***
## p_n_on_probation 0.0647496 0.0050606 12.795 < 2e-16 ***
## p_current_on_probation 0.1570690 0.0253391 6.199 5.94e-10 ***
## p_prob_revoke 0.0506041 0.0198557 2.549 0.01083 *
## race_black 0.2658235 0.0222736 11.934 < 2e-16 ***
## race_white 0.1089137 0.0225424 4.831 1.38e-06 ***
## race_hispanic 0.0308834 0.0270128 1.143 0.25295
## race_asian -0.0139690 0.0744987 -0.188 0.85127
## race_native 0.1331917 0.0951964 1.399 0.16181
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4903 on 9024 degrees of freedom
## Multiple R-squared: 0.3589, Adjusted R-squared: 0.3576
## F-statistic: 297.1 on 17 and 9024 DF, p-value: < 2.2e-16
res_rmse[res_rmse$Group==4,]$lm = rmse(predict(mdl_lm, newdata=train), train$p_recid_raw_noage) # ADJUST GROUP
Model 2: xgboost
set.seed(11)
mdl_xgb = fit_xgboost(train_xgb, param)
## Training on 16 sets of parameters.
## 6
## objective "reg:linear"
## eval_metric "rmse"
## eta "0.1"
## gamma "0.5"
## max_depth "5"
## min_child_weight "5"
## subsample "1"
## colsample_bytree "1"
### xgboost plot
pred = predict(mdl_xgb, newdata=train_xgb)
actual = train$p_recid_raw_noage
res_rmse[res_rmse$Group==4,]$xgb = rmse(pred, actual) # ADJUST GROUP
axis_min = min(min(pred),min(actual))
axis_max = max(max(pred),max(actual))
data.frame(xgboost = pred, compas=actual) %>%
ggplot() +
geom_point(aes(x=compas,y=xgboost), alpha=.3) +
geom_abline(slope=1, color="red")+
xlim(c(axis_min,axis_max)) +
ylim(c(axis_min,axis_max)) +
coord_fixed() +
theme_bw()+
xlab("COMPAS violence raw remainder") +
ylab("Prediction of COMPAS violence raw remainder")+
theme(
text = element_text(size=12),
axis.text=element_text(size=12))

ggsave("Figures/rawScoreRemain_xgboost_violent.pdf",width = 4, height = 4, units = "in")
### Variable importance
xgb.plot.importance(importance_matrix = xgb.importance(model = mdl_xgb))

highlight = data.frame(
person_id= c(799, 1284, 1394, 1497, 1515, 1638, 3145, 3291, 5722, 6337, 6886, 7997, 8200, 8375, 8491, 10553, 10774, 11231, 11312, 11414),
screening_date = ymd(c("2014-06-15","2014-05-14","2014-11-28","2013-07-29","2013-10-23","2013-10-04","2014-12-14","2013-01-17","2013-10-24","2014-02-04","2013-07-12","2014-04-26","2014-05-05","2013-03-19","2014-01-18","2014-09-20","2013-04-09","2014-02-23","2014-05-02","2014-11-26")),
highlight = TRUE
)
df_plot = features_filt %>%
bind_cols(xgboost = predict(mdl_xgb, newdata=train_xgb)) %>%
left_join(highlight, by = c("person_id","screening_date")) %>%
mutate(highlight = if_else(is.na(highlight), FALSE, TRUE)) %>%
mutate(highlight = factor(if_else(highlight==TRUE,"In Table 5", "Not in Table 5"), levels=c("In Table 5", "Not in Table 5")))
person_id_text_topright = c(8491, 8375, 1497)
#person_id_text_topright = highlight$person_id
person_id_text_topleft = c(5722, 11231)
person_id_text_botright = c(799, 11312, 1284, 11414)
person_id_text_botleft = c()
ggplot() +
geom_point(aes(x=xgboost,y=p_recid_raw, color=highlight), alpha = .3, data = filter(df_plot, highlight=="Not in Table 5")) +
geom_point(aes(x=xgboost,y=p_recid_raw, color=highlight), data = filter(df_plot, highlight=="In Table 5")) +
theme_bw()+
geom_text(aes(x=xgboost,y=p_recid_raw,label=name),size=3,nudge_x=0, nudge_y=0, hjust="left",vjust="bottom", data=filter(df_plot, person_id %in% person_id_text_topright & highlight=="In Table 5")) +
geom_text(aes(x=xgboost,y=p_recid_raw,label=name),size=3,nudge_x=0, nudge_y=0, hjust="right",vjust="bottom", data=filter(df_plot, person_id %in% person_id_text_topleft & highlight=="In Table 5")) +
geom_text(aes(x=xgboost,y=p_recid_raw,label=name),size=3,nudge_x=0, nudge_y=0, hjust="left",vjust="top", data=filter(df_plot, person_id %in% person_id_text_botright & highlight=="In Table 5")) +
geom_text(aes(x=xgboost,y=p_recid_raw,label=name),size=3,nudge_x=0, nudge_y=0, hjust="right",vjust="top", data=filter(df_plot, person_id %in% person_id_text_botleft & highlight=="In Table 5")) +
xlab("Prediction of COMPAS violence raw remainder") +
ylab("COMPAS violence raw")+
theme(
text = element_text(size=12),
axis.text=element_text(size=12),
#legend.position = "top",
legend.position="none") +
scale_color_discrete(name = element_blank())

ggsave("Figures/xgboost_rawScore_violent.pdf",width = 4, height = 4, units = "in")
Model 3: random forest
set.seed(379)
mdl_rf = randomForest(
formula = p_recid_raw_noage ~ .,
data = train
)
res_rmse[res_rmse$Group==4,]$rf = rmse(mdl_rf$predicted, train$p_recid_raw_noage) # ADJUST GROUP
Model 4: SVM
mdl_svm = fit_svm(p_recid_raw_noage ~ ., train, param_svm)
## Training on 27 sets of parameters.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
## [1] "Best parameters:"
## 2
## type "eps-regression"
## cost "1"
## epsilon "0.5"
## gamma_scale "0.5"
## gamma "0.02777778"
## Warning in cret$cresults * scale.factor: Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
res_rmse[res_rmse$Group==4,]$svm = rmse(mdl_svm$fitted, train$p_recid_raw_noage) # ADJUST GROUP
Cleanup
rm(train, train_xgb, mdl_lm, mdl_xgb, mdl_rf)
Comparison
knitr::kable(res_rmse)
| 1 |
0.5044421 |
0.4831389 |
0.4951366 |
0.4816624 |
| 2 |
0.4953577 |
0.4699155 |
0.4879687 |
0.4792197 |
| 3 |
0.4985543 |
0.4682046 |
0.4923167 |
0.4865796 |
| 4 |
0.4898085 |
0.4565221 |
0.4833362 |
0.4759958 |